#Gärtner et al. Plasmacytoid dendritic cells regulate tissue homeostasis of megakaryocytes
library(SoupX)
## Warning: package 'SoupX' was built under R version 4.1.2
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(patchwork)
library(sctransform)
## Warning: package 'sctransform' was built under R version 4.1.2
library(ggplot2)
library(dittoSeq)
library(RColorBrewer)
## Warning: package 'RColorBrewer' was built under R version 4.1.2
library(data.table)
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
##
## between, first, last
library(magrittr)
## Warning: package 'magrittr' was built under R version 4.1.2
library(readxl)
## Warning: package 'readxl' was built under R version 4.1.2
library(reshape2)
##
## Attaching package: 'reshape2'
## The following objects are masked from 'package:data.table':
##
## dcast, melt
library(enrichR)
## Warning: package 'enrichR' was built under R version 4.1.2
## Welcome to enrichR
## Checking connection ...
## Enrichr ... Connection is Live!
## FlyEnrichr ... Connection is Live!
## WormEnrichr ... Connection is Live!
## YeastEnrichr ... Connection is Live!
## FishEnrichr ... Connection is Live!
## OxEnrichr ... Connection is Live!
library(Seurat)
## Warning: package 'Seurat' was built under R version 4.1.2
## Attaching SeuratObject
library(clusterProfiler)
## Warning: package 'clusterProfiler' was built under R version 4.1.2
##
## Registered S3 method overwritten by 'ggtree':
## method from
## identify.gg ggfun
## clusterProfiler v4.2.2 For help: https://yulab-smu.top/biomedical-knowledge-mining-book/
##
## If you use clusterProfiler in published research, please cite:
## T Wu, E Hu, S Xu, M Chen, P Guo, Z Dai, T Feng, L Zhou, W Tang, L Zhan, X Fu, S Liu, X Bo, and G Yu. clusterProfiler 4.0: A universal enrichment tool for interpreting omics data. The Innovation. 2021, 2(3):100141
##
## Attaching package: 'clusterProfiler'
##
## The following object is masked from 'package:stats':
##
## filter
library(stringr)
## Warning: package 'stringr' was built under R version 4.1.2
library(EnhancedVolcano)
## Loading required package: ggrepel
library(dittoSeq)
library(ComplexHeatmap)
## Loading required package: grid
## ========================================
## ComplexHeatmap version 2.10.0
## Bioconductor page: http://bioconductor.org/packages/ComplexHeatmap/
## Github page: https://github.com/jokergoo/ComplexHeatmap
## Documentation: http://jokergoo.github.io/ComplexHeatmap-reference
##
## If you use it in published research, please cite:
## Gu, Z. Complex heatmaps reveal patterns and correlations in multidimensional
## genomic data. Bioinformatics 2016.
##
## The new InteractiveComplexHeatmap package can directly export static
## complex heatmaps into an interactive Shiny app with zero effort. Have a try!
##
## This message can be suppressed by:
## suppressPackageStartupMessages(library(ComplexHeatmap))
## ========================================
library(RColorBrewer)
library(circlize)
## Warning: package 'circlize' was built under R version 4.1.2
## ========================================
## circlize version 0.4.15
## CRAN page: https://cran.r-project.org/package=circlize
## Github page: https://github.com/jokergoo/circlize
## Documentation: https://jokergoo.github.io/circlize_book/book/
##
## If you use it in published research, please cite:
## Gu, Z. circlize implements and enhances circular visualization
## in R. Bioinformatics 2014.
##
## This message can be suppressed by:
## suppressPackageStartupMessages(library(circlize))
## ========================================
#Reading a Seurat Object: The Seurat object MKP3HTO is loaded from an RDS file stored locally. RDS is a format used in R for saving and loading objects. #Setting the Default Assay: The default assay of the Seurat object is set to ‘RNA’, which specifies the type of data (RNA sequencing data) to be used in subsequent analyses. #UMAP Visualization for Clusters: A UMAP (Uniform Manifold Approximation and Projection) plot is created to visualize data clusters based on ‘HTO_classification’, a specific type of cell classification. UMAP is a dimensionality reduction technique often used in single-cell data analysis. #Shuffled UMAP Plot for Clustering Robustness: A shuffled UMAP plot is generated, likely to assess the robustness of the clustering. Shuffling the data can help in understanding how well the clusters are defined. #UMAP Plot with Seurat Clusters: Another UMAP plot is created, this time showing clusters identified by Seurat’s clustering algorithm, with each cluster labeled. #Identifying Marker Genes: The script identifies marker genes in each cluster using statistical methods (here, Wilcoxon test). Marker genes are those significantly expressed in specific clusters and can help in characterizing cell types. #Selecting Top Marker Genes: The top two marker genes per cluster are selected based on log fold change, highlighting the most differentially expressed genes in each cluster. #Preparation for Dot Plot: The script prepares data for the top 10 marker genes per cluster for a dot plot visualization. This involves selecting genes based on their average log2 fold change. #Dot Plot for Marker Genes: Finally, a dot plot is created to visualize the expression of these top 10 marker genes across the different clusters. This type of plot is useful for identifying genes that define each cluster’s characteristics.
# Reading the Seurat object from an RDS file
MKP3HTO = readRDS("~/Downloads/final_MKP3HTO.RDS") # Replace with your file path
# Setting the default assay to 'RNA' for the Seurat object
DefaultAssay(MKP3HTO) <- "RNA"
# Creating a UMAP plot to visualize clusters based on HTO classification
print(DimPlot(MKP3HTO, group.by = "HTO_classification"))
# Indicating a figure label for manuscript reference
print("Manuscript Extended Data Figure 10 a")
## [1] "Manuscript Extended Data Figure 10 a"
# Creating a shuffled UMAP plot for robustness check of clustering
print(DimPlot(MKP3HTO, shuffle = T, seed = 1, group.by= "HTO_classification", split.by= "HTO_classification", ncol=3, raster=FALSE))
# Indicating another figure label for manuscript reference
print("Manuscript Figure 4D")
## [1] "Manuscript Figure 4D"
# Creating a UMAP plot showing different Seurat clusters with labels
print(DimPlot(MKP3HTO, group.by= "seurat_clusters", raster=FALSE, label = TRUE))
# Finding all significant marker genes in the Seurat object
MKP3HTO_markers <- FindAllMarkers(MKP3HTO, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25, test.use = "wilcox")
## Calculating cluster Mixed_progenitors
## Calculating cluster Late_MKP
## Calculating cluster MK-MEP
## Calculating cluster MK-MEP_cycling
## Calculating cluster GMP
## Calculating cluster Early_MKP
## Calculating cluster Immature_neutrophils
## Calculating cluster Basophils
## Calculating cluster NK
## Calculating cluster Erythroid
# Selecting the top 2 marker genes per cluster based on log fold change
MKP3HTO_markers %>%
group_by(cluster) %>%
slice_max(n = 2, order_by = avg_log2FC)
## # A tibble: 20 × 7
## # Groups: cluster [10]
## p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene
## <dbl> <dbl> <dbl> <dbl> <dbl> <fct> <chr>
## 1 0 1.39 0.894 0.494 0 Mixed_progenitors AY036118
## 2 0 1.13 0.996 0.976 0 Mixed_progenitors Camk1d
## 3 0 4.31 0.965 0.358 0 Late_MKP Tsc22d1
## 4 0 4.14 0.999 0.688 0 Late_MKP Ctla2a
## 5 4.45e-252 1.28 0.835 0.223 8.69e-248 MK-MEP Apoe
## 6 7.00e-178 1.18 0.987 0.715 1.37e-173 MK-MEP Pdcd4
## 7 8.48e-199 1.27 0.944 0.389 1.66e-194 MK-MEP_cycling H2afx
## 8 6.68e-180 1.25 0.898 0.383 1.30e-175 MK-MEP_cycling Hist1h2ap
## 9 0 3.32 0.924 0.219 0 GMP Lgals1
## 10 1.73e-246 3.13 0.9 0.414 3.38e-242 GMP Prtn3
## 11 1.19e-176 2.58 0.915 0.326 2.33e-172 Early_MKP Prkg1
## 12 2.63e-157 2.42 0.994 0.551 5.13e-153 Early_MKP Pbx1
## 13 1.29e-137 7.16 0.918 0.373 2.51e-133 Immature_neutrophils S100a9
## 14 7.94e-120 6.90 0.828 0.319 1.55e-115 Immature_neutrophils S100a8
## 15 0 7.06 0.855 0.102 0 Basophils Prss34
## 16 0 6.15 0.897 0.09 0 Basophils Mcpt8
## 17 1.17e-255 4.90 0.453 0.024 2.29e-251 NK Ccl5
## 18 0 3.43 0.608 0.009 0 NK Trbc2
## 19 1.05e- 51 6.70 0.679 0.333 2.05e- 47 Erythroid Hbb-bs
## 20 1.43e-101 6.38 0.69 0.173 2.80e- 97 Erythroid Hbb-bt
# Preparing data for the top 10 marker genes per cluster for plotting
MKP3HTO_markers %>%
group_by(cluster) %>%
top_n(n = 10, wt = avg_log2FC) -> top10
# Indicating a dot plot figure label for manuscript reference
print("Dotplot of cluster defining genes")
## [1] "Dotplot of cluster defining genes"
# Indicating another figure label for manuscript reference
print("Manuscript Extended Data Figure 10 b")
## [1] "Manuscript Extended Data Figure 10 b"
# Creating a dot plot to visualize the top 10 marker genes in clusters
print(DotPlot(MKP3HTO, features = unique(top10$gene))+ RotatedAxis())
print("getting proportion of cells per cluster for each condition")
## [1] "getting proportion of cells per cluster for each condition"
# Indicating the figure title for manuscript reference
print("Manuscript Figure 4E")
## [1] "Manuscript Figure 4E"
# Generating a bar plot showing the proportion of cells per cluster
# 'object': specifies the Seurat object (MKP3HTO) to be used
# 'var': the variable ('CSclassification') used to group the cells
# 'group.by': clustering resolution, here at 0.25, to categorize cells
dittoBarPlot(
object = MKP3HTO,
var = "CSclassification",
group.by = "RNA_snn_res.0.25")
# Reading the bulk sequencing differentially expressed genes from a CSV file
bulk_de_genes = read.csv("~/Downloads/MKP_samples_log2FC_forVisha.csv", header = TRUE)
# Setting the gene names as row names for the data frame
rownames(bulk_de_genes) = bulk_de_genes$Gene_name
# Selecting genes upregulated in PD vs CNTRL
genes_pd_vs_cntrl <- subset(bulk_de_genes, log2FC_PD_vs_Ctrl > 0)
genes_pd_vs_cntrl = rownames(genes_pd_vs_cntrl)
# Selecting genes downregulated in PDC+PD vs CNTRL
genes_pdpdc_vs_pd = subset(bulk_de_genes, log2FC_PD_DT_vs_PD < 0)
genes_pdpdc_vs_pd = rownames(genes_pdpdc_vs_pd)
# Finding the intersection of upregulated and downregulated genes
final_genes = intersect(genes_pd_vs_cntrl, genes_pdpdc_vs_pd)
# Adding a module score for these genes in the Seurat object
MKP3HTO <- AddModuleScore(MKP3HTO, features = list(final_genes), name = "final_genes")
## Warning: The following features are not present in the object: 1810021M19Rik,
## D2Wsu81e, Gm40508, Shfm1, LOC108167358, C330027C09Rik, Gm15264, Sgol1,
## Mir703, Gm40227, Vimp, Gm5812, Gm4617, Tdpx-ps1, 2700094K13Rik, Gm36368,
## Helt, LOC105247125, Gm42312, 5730422E09Rik, Gm36876, Yy2, Gm31619, Ddx26b,
## Gm41847, Gm32673, Fbxl16, LOC108168971, Fdx1l, Rplp0, Gm39923, Gm13394, Tusc5,
## Gm15452, Gm40017, Gm3740, LOC108169013, Gm36673, Rps15a-ps4, Gm14176, Gm6361,
## LOC108167318, 1110018N20Rik, 1810022K09Rik, Rpl37a, Rps2, Gm6532, Gm10705, Rpl8,
## Rps6, Gm39351, LOC108168421, Ube2u, Hn1, Gm32534, Rpl7a, Gm10444, Rps11-ps2,
## Gm32399, Irx5, 1110001J03Rik, Gm39877, Gm30718, Gm11914, Gdap10, Rps12-ps10,
## Gm30181, Gm5620, Rpl13a, Gm41320, Gm3386, Olfr54, 5031434C07Rik, Rps15a,
## Gm40812, LOC108167334, LOC102633308, Tmem252, Mgea5, Gm41912, LOC102632541,
## Gm41027, Gm33100, 5730408K05Rik, Gm40372, Gm6158, Gm38483, Gm32137, Gm39850,
## Cd1d2, Gm30073, Minos1, Gm39871, LOC105246245, Gm32549, B930092H01Rik,
## Tcp10c, Zfp607, Vmn2r45, Hyi, Gm38808, LOC102633666, Gm34292, Gm38416, Dcx,
## Rpl36a, Gm37053, Gm8430, Mar-06, Rpl38, Gm38621, D17H6S56E-5, Bnip3l-ps,
## LOC108168239, Rps19, Gm33285, BC037034, Ppy, Gm6644, Gm29879, Gm8784, Gm34496,
## Gm32269, Gm42042, Gm12911, Gm31727, Gm38663, Gm18798, BE949265, Optc, Tceb1,
## 4931431B13Rik, Gm12338, Gm8995, LOC108167687, Gm7669, LOC105244208, Fam46a,
## Cyp2a22, Gm38761, Rpl18, Gm8866, BC002163, Rps4x-ps, Fam64a, D130037M23Rik,
## Rpl4, Gm5087, Xrcc6bp1, Gm35321, 6330407A03Rik, Gm41192, C330006A16Rik,
## Gm5347, Gm35449, LOC108168951, Gm31842, Peo1, Fam131c, Rpl17, Gm33097,
## Gm29826, Bzrap1, Wbscr22, 2810474O19Rik, Gm33046, Tmem194, Scrt1, Enthd2,
## Emc8-1190005i06rik, Gm10231, Rps3, Gm17057, Vmn1r-ps76, Gm31721, Chrna2,
## Whsc1, Gm29824, 4930543N07Rik, Gm31012, Gm3776, Gm10033, Gm26849, Gm41527,
## Rpl35, Rps18, Gm30145, Gm36754, Gm21158, Gm8463, Gm33651, LOC108167923, Rps15,
## Tbx20, Gm41798, Gm33055, Gm29938, Gm38960, LOC108167404, LOC108168071, Gm20850,
## LOC102634904, Gm32714, Gm42110, Gm31724, Vmn1r-ps78, Gm39697, 0610037L13Rik,
## Rpl23, Gm38957, Gm31208, Fam208b, Gm26688, Gm40403, Fam188a, 1500011K16Rik,
## Smek1, Gm31217, Gm32957, Rps16-ps2, Gm2163, LOC105244649, Sssca1, 2700060E02Rik,
## Gm31758, Obox3-ps5, Hpd, LOC108168233, Gm9892, Gm17250, Gm4332, LOC108167728,
## Gm30346, Gm15710, Rpl13, Gm4754, Gm14680, Gtsf1l, Hmha1, Gm38450, Gm3724,
## Gm39827, Cyp2b26-ps, Gm5486, Lexm, Wfdc6a, Gm5130, LOC108168025, Gm41549,
## Gm18097, Gm5879, Gm4792, Gm9794, Pax4, Gm32394, Cdrt4, Gm4149, Sprr2a2,
## Sepn1, Gm30789, Gm30532, Gm42201, Rpl5, Nodal, Vmn2r44, Gm41519, Rps15a-ps6,
## Rtbdn, Gm4739, Gm39527, LOC102638891, Hbb-bh2, LOC101056014, Gm42213, Gm29933,
## Gm7536, Rps29, Trmt112-ps2, Gm42034, Trim50, LOC102632031, Gm14303, Gm10774,
## Gm39162, LOC108169055, Gm41569, Kirrel2, Serpinb9c, Gm31228, LOC108167924,
## Gm35644, Tmem55b, Gm19600, Gm10222, Gm7180, 2310036O22Rik, Gm14204, Gm9255,
## Rpl34, LOC108167523, Rpl10, Mum1, Gm41220, Ube2n-ps1, Gm3047, Rps26,
## LOC108167669, Fhad1os2, Gm38566, Gm42324, LOC108168511, Gm42344, Gm30330,
## LOC105246089, Gm32727, Gm9745, Rpl39, Rps27a, Olfr287, AV320801, Pnma5,
## Gm42308, 4833422M21Rik, Gm42013, Npm3-ps1, Gm32190, Atp5sl, C030037F17Rik,
## Gm38636, Gm5781, Gm16350, Rps15a-ps7, Gm35343, Pla2g2a, Rps21, Gm16365, Gm32709,
## Gm35396, Gm32756, LOC102640520, Rpl12, Gm33083, Tceb2, Gm33733, LOC108167852,
## Gm39586, Rpl3, Gm6485, 1810043H04Rik, 1700024F13Rik, Gm32670, Gm44504, Gm6988,
## Cyp2c52-ps, Gm38599, Ankrd34a, Gm34521, Gm41302, Elfn2, Rpl14, Gm31048, Barx1,
## Snap25, 9030622O22Rik, Gm33433, 2610524H06Rik, LOC100503338, Gm41204, Gm15500,
## LOC108167658, Selk, Gm30461, Gm41313, Gm42060, Mettl20, Gm34689, Pyhin1,
## Scgb2b8-ps, Gm30658, Rpl19, Gm41166, Gm39665, Igkv3-12, Gm30970, Gm40009,
## Rps10, Fam63a, 5730488B01Rik, Gm34995, Gm35365, Gm16432, Gm40246, Gm13142,
## Rpl37rt, Gm40278, 3110002H16Rik, Gm41995, Fam175b, Gm15723, LOC108167337,
## Gm35884, Obox3-ps6, Gm30005, Gm5278, Gm39487, LOC108168399, Rps20, Gm38732,
## Gm29688, Gm34350, Trpa1, Gbp11, Gm10619, Gm33624, Myom2, Gm31340, LOC108168109,
## Nhp2l1, LOC108168293, C530044C16Rik, Gm41526, Gm41865, Ang6, Serpina1a,
## Gbp2-ps, Gm32099, Gm32422, Rps23-ps1, Gm19303, Gm36188, Rps6ka3, Gm30724,
## Gm40034, Gm7730, 2810025M15Rik, Fam65a, Gm17733, Gm5740, Aes, Gm42398, Gm34865,
## LOC102640586, 1700034E13Rik, Gm2773, Gm41336, Gm13420, Gm8451, Rps9, Gm30364,
## 1810026J23Rik, Gm35208, LOC100534331, LOC108169173, Gm11478, Rps14, Gm40799,
## Gm30286, Gm39866, Gm14199, Gm32525, Gm19764, Morf4l1-ps1, Gm38782, Gm7068,
## Vmn1r-ps71, Gm7666, Gm12371, Gm35040, LOC108168082, Dpt, Gm9703, Ugt2b36,
## D17Wsu92e, Gm30212, Gm40191, Rpl9-ps6, Gm40446, Rps12-ps4, Gm40680, Zufsp,
## LOC108167317, Rps13-ps2, Gm8159, D230017M19Rik, Amd-ps4, Zic3, Gm14586, Rpl11,
## Gm20687, 9330133O14Rik, Gltscr1l, Gm32335, Gm15163, Tcp10a, Snhg7, Gm32370,
## Gm4855, Gm32896, Gm38554, Gm4956, Gm35760, Gm28166, Gm32276, Kdelc1, Gm20836,
## Cyp2j11, Gm32450, Rplp2, Gm34070, Gm9843, Gm34472, LOC108167644, LOC102637814,
## Gm1966, Gm33531, Gm30628, Gm2999, Gm36670, Rpl41, Gm3086, Gm14494, Gm40733,
## Gm30524, Gm14532, Gm11683, Cecr5, Gltscr2, Gm31546, LOC108168860, Gm39101,
## Gm30215, Myocd, Gm35118, Pcsk2os1, Gm15610, Gm4827, Ngfrap1, Gm31102, Gm32545,
## Gm35078, Gm40905, LOC108168267, Rps15a-ps5, Gm30548, LOC108169051, Rps27a-ps2,
## Gm27177, Rps12, Gm30368, Csrp2bp, Cyp2j15-ps, Rpl21, Rps16, Casc5, Gm18075,
## Gm16701, Prap1, Atp4b, Gm40874, Rn18s-rs5, Csta1, Rpl10a-ps1, Fgf4, Pcdhga3,
## LOC105245440, Gm34317, Gm8267, Gm30400, Gm33983, Pisd-ps2, 4930432L08Rik,
## 9430014N10Rik, Gm36603, Vmn1r63, LOC108167528, Gm42088, LOC108169076,
## Gm39160, Krt88, Cfap161, Gm35478, Rplp1, Rpl30, Gm39325, Gm12496, Gm13268,
## Kcnq4, Kdelc2, Btnl6, Gm3219, Gm11682, Gm32017, LOC108167997, Gm39481, Pex5l,
## Suv420h1, LOC107988026, Morf4l1b, Fam109b, Gm16348, 6820431F20Rik, Gm34333,
## LOC105246961, Asun, LOC102641351, Gm33893, Hottip, Gm10123, Gm33068, Gm8926,
## Gm33950, Gm36025, Prl7a1, LOC108169047, A430046D13Rik, Gm10857, Zscan30,
## Gm42235, Gm32110, Rpl7, Igf2bp1, Gm31042, Nat6, D930030F02Rik, LOC105243337,
## Gm2803, Gm41508, Gm9712, Fam60a, Tpsab1, LOC102634709, LOC108168357, Gm13528,
## Gm31962, Rpl18-ps1, Gm41401, 6330416G13Rik, Platr11, 4833447I15Rik, Gm32348,
## Gm42259, Gm5907, Gm9769, LOC108168928, Rpl7l1, Gm36048, Gm40336, Gm2477,
## LOC102631912, Gm38512, Gm39033, Fam134a, Gm36220, 4933427G17Rik, Gm35104,
## Gm41892, LOC108167447, LOC100534390, LOC105244803, LOC108167686, 1700112E06Rik,
## Gm40363, 6430584L05Rik, Gm31078, Gm41209, Mar-08, Synj2bp-cox16, Gm30140,
## Gm40159, Gm26232, Olfr1418, Gm13248, Dgcr14, Gm16505, Gm34388, Gm38515,
## 5930403L14Rik, Gm6498, Gm39851, Fam179b, Gm36596, LOC108168889, 4933413G19Rik,
## Cmtm5, Dirc2, Gm34058, Gm41926, Gm34667, Gm30352, Gm38767, Gm15387, Myh7,
## Ang-ps2, 4831440E17Rik, Wnt2, Gm35765, Gm34444, Gm12119, 4931406H21Rik, Gm40349,
## LOC108167465, 1700112H15Rik, Pllp, 1700026F02Rik, Rps17, Gm10221, Gm35155,
## Prss43, Adh4, Gm32374, Nudc-ps1, Sox10, Gm39974, Gm4184, 1700031M16Rik, Gm39337,
## LOC108167633, Gm2048, Gm5682, Etd, Kif26a, Gm32579, LOC102642832, 1700018B08Rik,
## LOC108167604, Fgb, Mgat5b, Gm32328, Gm7426, Gm9835, LOC108168813, LOC108167469,
## Gm42325, Gm18837, LOC105242790, Hoxc5, LOC108167878, Itln1, Gm32220, Rpl6,
## Opalin, Fam96b, Nup62cl, C330016L05Rik, Gm15742, Gm31244, Gm31473, Gm42250,
## Ugt1a10, Gm38499, Rpl31, LOC105243608, Cacna1h, Gm6650, Gm38993, Gm34669,
## Best2, Gm40550, 1110008F13Rik, Rpl10-ps3, Gm31298, Hils1, C230062I16Rik,
## Gm30936, Mir6236, Gm29955, Tekt3, Gm20684, LOC108168493, Rps11, Gm30086,
## Gm39241, Mar-05, LOC108169093, Speer4a, Gm7265, Stoml3, Gm41836, 3110062M04Rik,
## Tmem114, LOC102632770, Gm7338, LOC102640103, Gm41359, Gm4726, Rps3a1,
## Gm12346, Gm31265, Gm32646, Olfr129, Ssfa2, Serpinb1b, A730089K16Rik, Gm41699,
## Gm9207, 4930447F24Rik, Gm4750, Gm10224, LOC108168488, Gm36386, LOC108167736,
## LOC108167896, Pcp4, Rpl7a-ps12, Wnt1, Defb26, Gm15612, Gm30922, LOC100503946,
## Mtl5, Gm3050, LOC102642435, Gm29943, Spint3, LOC102635252, Rps8, Gm34016,
## Gm36229, Gm18221, Rpl28-ps1, Gm9919, Gm39938, Gm39546, Rps27rt, Gm32069,
## Gm30848, LOC108167773, Gm41598, Pcdha11, 1700003C15Rik, Fam65b, Gm34749,
## Gm40986, 6030466F02Rik, Gm33236, Gm41056, Grxcr2, Amica1, Fam69a, Btnl7-ps,
## Mtss1l, Gm31671, Olfr671, Sep-15, 4930447M23Rik, Rpsa, Raver1, 4833427
# Subsetting the Seurat object to exclude certain classifications
MKP3HTO = subset(x = MKP3HTO, subset = CSclassification=="pDC-depletion", invert = TRUE)
# Setting the identity classes for the Seurat object
Idents(MKP3HTO) = MKP3HTO$Idents
# Creating and printing violin plots to visualize the module scores
# Different plots show the distribution of scores across various classifications and identities
print(VlnPlot(MKP3HTO, features = "final_genes1", group.by ="CSclassification"))
print(VlnPlot(MKP3HTO, features = "final_genes1", split.by ="CSclassification"))
## The default behaviour of split.by has changed.
## Separate violin plots are now plotted side-by-side.
## To restore the old behaviour of a single split violin,
## set split.plot = TRUE.
##
## This message will be shown once per session.
print(VlnPlot(MKP3HTO, features = "final_genes1", group.by ="CSclassification", split.by = "Idents"))
print(VlnPlot(MKP3HTO, features = "final_genes1", group.by = "Idents"))
# Subsetting the Seurat object for cluster 3 (MK-MEP_cycling)
subset_obj = subset(x = MKP3HTO, subset = seurat_clusters == 3)
# Setting the identities in the subset object based on CSclassification
Idents(subset_obj) = subset_obj$CSclassification
# Extracting gene names for the cluster of interest
gene_names <- rownames(subset_obj@assays$RNA@scale.data)[subset_obj$seurat_clusters == 3]
# Finding DE genes for PD vs Bl6 condition using Wilcoxon test
de_results <- FindMarkers(
subset_obj,
ident.1 = "Platelet-depletion",
ident.2 = "Bl6",
test.use = "wilcox",
slot= "scale.data",
genes.use = gene_names # Using specified gene names for comparison
)
# Preparing a volcano plot for visualizing DE results
print("Manuscript Figure 4h")
## [1] "Manuscript Figure 4h"
volcano = EnhancedVolcano(de_results,
lab = rownames(de_results),
x = 'avg_diff',
y = 'p_val_adj',
title = "PD vs CNTRL in cycling_MK_MEP cluster",
pCutoff = 0.05, # P-value cutoff for significance
FCcutoff = 0.25, # Fold change cutoff for significance
# Additional plot parameters for aesthetics and readability
pointSize = 3.0,
colAlpha = 1,
legendLabSize = 12,
legendIconSize = 4.0,
drawConnectors = TRUE,
widthConnectors = 0.2,
max.overlaps = 30,
colConnectors = 'grey30',
labSize = 6.0)
print(volcano)
## Warning: ggrepel: 300 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
# Repeat the process for PD vs pDC-Platelet-depletion condition
de_results <- FindMarkers(
subset_obj,
ident.1 = "Platelet-depletion",
ident.2 = "pDC-Platelet-depletion",
test.use = "wilcox",
slot= "scale.data",
genes.use = gene_names # Using specified gene names for comparison
)
# Preparing another volcano plot for the second condition comparison
print("Manuscript Figure 4i")
## [1] "Manuscript Figure 4i"
volcano = EnhancedVolcano(de_results,
lab = rownames(de_results),
# Parameters similar to the previous volcano plot
x = 'avg_diff',
y = 'p_val_adj',
title = "PD vs CNTRL in cycling_MK_MEP cluster",
pCutoff = 0.05,
FCcutoff = 0.25,
# Additional plot parameters
pointSize = 3.0,
colAlpha = 1,
legendLabSize = 12,
legendIconSize = 4.0,
drawConnectors = TRUE,
widthConnectors = 0.2,
max.overlaps = 30,
colConnectors = 'grey30',
labSize = 6.0)
print(volcano)
DefaultAssay(MKP3HTO) <- "RNA"
## response to type I interferon genes as bulk_seq genes
bulk_seq = read.csv("~/Downloads/GOBP_RESPONSE_TO_TYPE_I_INTERFERON.csv", header = TRUE)
print("response to interferon genes")
## [1] "response to interferon genes"
head(bulk_seq)
## NAME PROBE
## 1 row_0 TYK2
## 2 row_1 HLA-A
## 3 row_2 PTPN6
## 4 row_3 CACTIN
## 5 row_4 BST2
## 6 row_5 YTHDF2
## DESCRIPTION.br..from.dataset.
## 1 tyrosine kinase 2 [Source:HGNC Symbol;Acc:HGNC:12440]
## 2 major histocompatibility complex, class I, A [Source:HGNC Symbol;Acc:HGNC:4931]
## 3 protein tyrosine phosphatase non-receptor type 6 [Source:HGNC Symbol;Acc:HGNC:9658]
## 4 cactin, spliceosome C complex subunit [Source:HGNC Symbol;Acc:HGNC:29938]
## 5 bone marrow stromal cell antigen 2 [Source:HGNC Symbol;Acc:HGNC:1119]
## 6 YTH N6-methyladenosine RNA binding protein 2 [Source:HGNC Symbol;Acc:HGNC:31675]
## GENE.SYMBOL GENE_TITLE RANK.IN.GENE.LIST RANK.METRIC.SCORE RUNNING.ES
## 1 null null 54 2.885287 0.04354477
## 2 null null 133 2.323923 0.07665094
## 3 null null 194 2.121879 0.10751816
## 4 null null 234 2.037717 0.13822223
## 5 null null 255 1.982451 0.16911610
## 6 null null 289 1.911551 0.19812344
## CORE.ENRICHMENT
## 1 Yes
## 2 Yes
## 3 Yes
## 4 Yes
## 5 Yes
## 6 Yes
## getting the top 40 genes from response to type I interferon genes
bulk_seq = as.data.frame(bulk_seq)
rownames(bulk_seq) = bulk_seq$PROBE
bulk_seq_genes = head(rownames(bulk_seq), 40)
## converting the genes to mouse gene form - "Tyk2" "Hla-a"
bulk_seq_genes = tolower(bulk_seq_genes)
firstup = function(x) {
substr(x, 1, 1) = toupper(substr(x, 1, 1))
x
}
bulk_seq_genes = firstup(bulk_seq_genes)
print("response to interferon genes after processing")
## [1] "response to interferon genes after processing"
bulk_seq_genes
## [1] "Tyk2" "Hla-a" "Ptpn6" "Cactin" "Bst2" "Ythdf2"
## [7] "Hsp90ab1" "Irf9" "Ythdf3" "Ube2k" "Rnasel" "Setd2"
## [13] "Shmt2" "Cnot7" "Irf1" "Sp100" "Irak1" "Abce1"
## [19] "Mul1" "Egr1" "Adar" "Ifitm1" "Rsad2" "Jak1"
## [25] "Irf5" "Ifitm2" "Ptpn2" "Tbk1" "Dcst1" "Oasl"
## [31] "Mavs" "Xaf1" "Trim6" "Irf2" "Ifna1" "Ifnar2"
## [37] "Ifitm3" "Usp18" "Cdc37" "Ifna16"
### renaming the idents of seurat obj to conditions instead of clusters to calculate fold change between conditions for the genes
Idents(MKP3HTO) = MKP3HTO$CSclassification
subset_obj = subset(x = MKP3HTO, subset = seurat_clusters == 3)
Idents(subset_obj) = subset_obj$CSclassification
# finding the DE results for all genes of the seurat object
gene_names <- rownames(subset_obj@assays$RNA@scale.data)[subset_obj$seurat_clusters == 3]
de_results <- FindMarkers(
subset_obj,
ident.1 = "Platelet-depletion",
ident.2 = "Bl6",
test.use = "wilcox",
slot= "scale.data",
genes.use = gene_names # Specify the gene names for the comparison
)
# getting the interferon response genes
interferon = read.csv("~/Downloads/GOBP_RESPONSE_TO_TYPE_I_INTERFERON.csv", header = TRUE)
## converting the genes to mouse gene form - "Tyk2" "Hla-a"
interferon$PROBE = tolower(interferon$PROBE)
firstup = function(x) {
substr(x, 1, 1) = toupper(substr(x, 1, 1))
x
}
# getting the top 40 interferon response genes based on the heatmap of bulk-seq results
interferon$PROBE = firstup(interferon$PROBE)
rownames(interferon) = interferon$PROBE
interferon = head(rownames(interferon), 40)
# The interferon genes are
print(interferon)
## [1] "Tyk2" "Hla-a" "Ptpn6" "Cactin" "Bst2" "Ythdf2"
## [7] "Hsp90ab1" "Irf9" "Ythdf3" "Ube2k" "Rnasel" "Setd2"
## [13] "Shmt2" "Cnot7" "Irf1" "Sp100" "Irak1" "Abce1"
## [19] "Mul1" "Egr1" "Adar" "Ifitm1" "Rsad2" "Jak1"
## [25] "Irf5" "Ifitm2" "Ptpn2" "Tbk1" "Dcst1" "Oasl"
## [31] "Mavs" "Xaf1" "Trim6" "Irf2" "Ifna1" "Ifnar2"
## [37] "Ifitm3" "Usp18" "Cdc37" "Ifna16"
# subsetting the de_results data frame to only get the interferon genes
de_results_inf = na.omit(de_results[interferon, ])
de_results_inf$Gene = rownames(de_results_inf)
# adding a column called FDR which adjustes p val for all the interferon genes
de_results_inf$FDR = p.adjust(de_results_inf[rownames(de_results_inf), 'p_val'])
# getting the bulk_seq results file
bulk_de_genes = read.csv("~/Downloads/MKP_samples_log2FC_forVisha.csv", header = TRUE)
rownames(bulk_de_genes) = bulk_de_genes$Gene_name
genes_pd_vs_cntrl_pos <- bulk_de_genes
# subsetting the bulk-seq results to only focus on PD vs Cntrl columns
genes_pd_vs_cntrl_pos = genes_pd_vs_cntrl_pos[, c("log2FC_PD_vs_Ctrl", "FDR_PD_vs_control")]
# subsetting the interferon genes and their corresponding FC and FDR from bulk-seq results
genes_pd_vs_cntrl_pos = genes_pd_vs_cntrl_pos[rownames(de_results_inf), ]
genes_pd_vs_cntrl_pos = na.omit(genes_pd_vs_cntrl_pos)
genes_pd_vs_cntrl_pos$Gene = rownames(genes_pd_vs_cntrl_pos)
# merging the interferon fc and fdr values to the bulk-seq fc and fdr values for the condition PD vs CNTRL based on the common interferon genes (36 of them)
merged_data <- na.omit(merge(de_results_inf, genes_pd_vs_cntrl_pos, by = "Gene", all = TRUE))
rownames(merged_data) = merged_data$Gene
print("Manuscript Extended Data Figure 10 e")
## [1] "Manuscript Extended Data Figure 10 e"
# Create the scatterplot for the interferon genes. Plotting FC of scRNAseq on the x-axis and the FC of bulk-seq on the y-axis and colouring based on FDR calculated above using p.adjust
p <- ggplot(data = merged_data, aes(x = avg_diff, y = log2FC_PD_vs_Ctrl, label = Gene, color = FDR)) +
geom_point() +
geom_text(hjust = 0.001, vjust = 0.001, nudge_x = 0.001, nudge_y = 0.001, size = 3) +
scale_color_gradient(low = "blue", high = "red") +
labs(x = "Single Cell Cluster MK-MEP_cycling", y = "Bulk Seq PD vs Bl6") +
ggtitle("Scatterplot of Fold Changes for interferon response genes PD vs Bl6") +
ylim(min(merged_data$log2FC_PD_vs_Ctrl) * 0.9, max(merged_data$log2FC_PD_vs_Ctrl) * 1.1) + # Adjusting y-axis limits
xlim(min(merged_data$avg_diff) * 0.9, max(merged_data$avg_diff) * 1.1) # Adjusting x-axis limits
print(p)
## Warning: Removed 1 rows containing missing values (`geom_point()`).
## Warning: Removed 1 rows containing missing values (`geom_text()`).
######################################################################################
# getting the common DE genes from cluster 3 PD vs CNTL and bulk-seq PD vs CNTRL from already calculated file
cluster_3_Platelet_depletion_vs_Bl6_common_genes_pos = read.csv("~/Downloads/cluster_3_Platelet-depletion_vs_Bl6_common_genes_pos.csv")
cluster_3_Platelet_depletion_vs_Bl6_common_genes_pos = cluster_3_Platelet_depletion_vs_Bl6_common_genes_pos$x
# subsetting the de_results to only contain the common DE genes
common = na.omit(de_results[cluster_3_Platelet_depletion_vs_Bl6_common_genes_pos, ])
common$Gene = rownames(common)
# calculating the FDR for common genes
common$FDR = p.adjust(common[rownames(common), 'p_val'])
genes_pd_vs_cntrl_pos <- bulk_de_genes
# subsetting the bulk-seq results to only focus on PD vs Cntrl columns
genes_pd_vs_cntrl_pos = genes_pd_vs_cntrl_pos[, c("log2FC_PD_vs_Ctrl", "FDR_PD_vs_control")]
# subsetting the common DE genes and their corresponding FC and FDR from bulk-seq results
genes_pd_vs_cntrl_pos = genes_pd_vs_cntrl_pos[cluster_3_Platelet_depletion_vs_Bl6_common_genes_pos, ]
genes_pd_vs_cntrl_pos = na.omit(genes_pd_vs_cntrl_pos)
genes_pd_vs_cntrl_pos$Gene = rownames(genes_pd_vs_cntrl_pos)
# merging the common DE genes' fc and fdr values to the bulk-seq fc and fdr values for the condition PD vs CNTRL based on the common interferon genes (61 of them)
merged_data <- merge(common, genes_pd_vs_cntrl_pos, by = "Gene", all = TRUE)
rownames(merged_data) = merged_data$Gene
merged_data = na.omit(merged_data)
print("Manuscript Extended Data Figure 10 d")
## [1] "Manuscript Extended Data Figure 10 d"
# Create the scatterplot for the =common DE genes. Plotting FC of scRNAseq on the x-axis and the FC of bulk-seq on the y-axis and colouring based on FDR calculated above using p.adjust
p <- ggplot(data = merged_data, aes(x = avg_diff, y = log2FC_PD_vs_Ctrl, label = Gene, color = FDR)) +
geom_point() +
geom_text(hjust = 0.001, vjust = 0.001, nudge_x = 0.001, nudge_y = 0.001, size = 3) +
scale_color_gradient(low = "blue", high = "red") +
labs(x = "Single Cell Cluster MK-MEP_cycling", y = "Bulk Seq PD vs Bl6") +
ggtitle("Scatterplot of Fold Changes for interferon response genes PD vs Bl6") +
ylim(min(merged_data$log2FC_PD_vs_Ctrl) * 0.9, max(merged_data$log2FC_PD_vs_Ctrl) * 1.1) + # Adjusting y-axis limits
xlim(min(merged_data$avg_diff) * 0.9, max(merged_data$avg_diff) * 1.1) # Adjusting x-axis limits
print(p)
# enrichment results of the common genes between bulk-seq and single-cell MK-MEP_cycling cluster for PD vs Bl6 conditions.
enrichment_results <- enrichR::enrichr(
genes = cluster_3_Platelet_depletion_vs_Bl6_common_genes_pos,
databases = "GO_Biological_Process_2023"
)
## Uploading data to Enrichr... Done.
## Querying GO_Biological_Process_2023... Done.
## Parsing results... Done.
enrichment_results = as.data.frame(enrichment_results)
enrichment_data_pos <- data.frame(
Term = enrichment_results$GO_Biological_Process_2023.Term,
Overlap = enrichment_results$GO_Biological_Process_2023.Overlap,
P.value = enrichment_results$GO_Biological_Process_2023.P.value,
Adjusted.P.value = enrichment_results$GO_Biological_Process_2023.Adjusted.P.value,
Odds.Ratio = enrichment_results$GO_Biological_Process_2023.Odds.Ratio,
Combined.Score = enrichment_results$GO_Biological_Process_2023.Combined.Score,
Genes = enrichment_results$GO_Biological_Process_2023.Genes
)
print("Manuscript Extended Data Figure 10 d")
## [1] "Manuscript Extended Data Figure 10 d"
enrich_pos = plotEnrich(enrichment_data_pos)
## Warning in plotEnrich(enrichment_data_pos): There are duplicated trimmed names
## in the plot, consider increasing the 'numChar' setting.
print(enrich_pos)